# Simulation -- Unpacking

# Show that when two numbers add up to 100, they do not split into more evens
# Create a fake data set of Benford-conforming numbers
# Set up a data frame with random districts 

N <- 50000
Data = data.frame(matrix(ncol = 0, nrow = N))

## Create a lot of Benford conforming data 
# Between 2 and 4 digits long
mantissaM = exp(runif(N)*log(10))
powerM = sample(seq(1,3),N, replace = T)
Data$BenM = floor(mantissaM * 10^powerM)

mantissaF = exp(runif(N)*log(10))
powerF = sample(seq(1,3),N, replace = T)
Data$BenF = floor(mantissaF * 10^powerF)

# Create the sum of the data
Data$BenTot = Data$BenM + Data$BenF

# How many are rounded?
DataRound = Data[Data$BenTot %% 10 == 0,]
nrow(DataRound)
		# About 10%

library(digitanalysis)
library(ggplot2)



DataProcessed <- process_digit_data(raw_df = Data, digit_columns = c('BenTot', "BenM", "BenF"))


unpack = unpack_round_numbers_test(digitdata = DataProcessed, rounding_split_column="BenTot", analysis_columns=c("BenM", "BenF"),
                                   skip_first_digit=TRUE, omit_05=c(0,5), suppress_first_division_plots=TRUE, plot=TRUE)


t<- unpack$plots$unround$AllBreakout$AllCategory$aggregate_barplot+ggtitle("Unrounded") + theme(plot.title = element_text(hjust = 0.5))  + coord_cartesian(ylim = c(0, 0.2)) + scale_y_continuous(labels = scales::percent, breaks = seq(0, 0.2, by = .05)) + theme(legend.position = "none") 
s<- unpack$plots$round$AllBreakout$AllCategory$aggregate_barplot+ggtitle("Rounded") + theme(plot.title = element_text(hjust = 0.5))  + coord_cartesian(ylim = c(0, 0.2)) + scale_y_continuous(labels = scales::percent, breaks = seq(0, 0.2, by = .05)) + theme(legend.position = "none")

pdf("UnpackRound.pdf")
library(gridExtra)
grid.arrange(s, t, ncol = 2)
dev.off()

# For p-values

sink(file = "P-Values.txt")
unpack$p_values
sink()







